****
** Collapses appended data by fine groups for RDs.;

%MACRO make_earnings_quantiles();
	proc sort data = df; by byr; run;          
	proc univariate data = df noprint; var past_income; by byr; output out = temp1 pctlpts = 50 pctlpre = p; run;
	proc sql noprint;
		CREATE TABLE temp2 AS SELECT * FROM df A LEFT JOIN temp1 B ON A.byr = B.byr;
		CREATE TABLE df AS SELECT *,
			(CASE WHEN past_income <= p50 THEN 0 ELSE 1 END) AS high_income
			FROM temp2;
%MEND;


** Load data **;
libname secure "";
data df; set secure.sermbrphus_append; run;

** Prep data for collapses **;
data df; set df; white = (race = "W"); retire_age = retire1; byr = year(dob); run;
data df; set df; claim_age = year(doei) + (month(doei) - 1)/12 - year(dob) - (month(dob) - 1)/12; run;
%make_earnings_quantiles();


** Collapse by key covariates (birth year, sex, race, past income) **;
proc sql noprint;
	CREATE TABLE by_byr AS SELECT sum(1) AS num, byr, claim_age, retire_age, sex, white, high_income, life_income,
		sern55, sern56, sern57, sern58, sern59, sern60, sern61, sern62, sern63, sern64, sern65, sern66, sern67, sern68
	FROM df
	GROUP BY byr, claim_age, retire_age, sex, white, high_income;
		
proc export data = by_byr dbms = csv replace
	outfile = "by_byr.csv";
run;


** Collapse by birth date **;
proc sql noprint;
	CREATE TABLE by_bday AS SELECT sum(1) AS num, dob, claim_age, retire_age, sex, white, high_income, life_income,
		sern55, sern56, sern57, sern58, sern59, sern60, sern61, sern62, sern63, sern64, sern65, sern66, sern67, sern68
	FROM df WHERE (byr >= 1932 & byr <= 1943) & (month(dob) <= 3 | month(dob) >= 10)
	GROUP BY dob, claim_age, retire_age, sex, white, high_income;

proc export data = by_bday dbms = csv replace
	outfile = "by_bday.csv";
run;

** No collapse **;
proc sql noprint;
	CREATE TABLE by_ssn AS SELECT sum(1) AS num, dob, claim_age, retire_age, sex, white, high_income, life_income,
		sern55, sern56, sern57, sern58, sern59, sern60, sern61, sern62, sern63, sern64, sern65, sern66, sern67, sern68
	FROM df WHERE (byr >= 1932 & byr <= 1943) & (month(dob) <= 3 | month(dob) >= 10)
	GROUP BY ssn;

proc export data = by_ssn dbms = csv replace
	outfile = "by_ssn.csv";
run;



